package com.heima.utils;

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;

import java.awt.image.BufferedImage;

public class OcrUtil {
    private final static String DATA_PATH = "D:\\workspace\\cp392\\leadnews-base\\";
    private final static String LANGUAGE = "chi_sim";

    public static String doOcr(BufferedImage image) throws TesseractException {
        //创建Tesseract对象
        ITesseract tesseract = new Tesseract();
        //设置字体库路径
        tesseract.setDatapath(DATA_PATH);
        //中文识别
        tesseract.setLanguage(LANGUAGE);
        //执行ocr识别
        String result = tesseract.doOCR(image);
        //替换回车和tal键  使结果为一行
        result = result.replaceAll("[\\r\\n]", "-").replaceAll(" ", "");
        return result;
    }
}